alldata <- readr::read_csv(here::here("data/ldn_stop_search_with_lnglat_36m.csv"), show_col_types = FALSE)
offence <- readr::read_csv(here::here("data/ldn_stop_search_offence_36m.csv"), show_col_types = FALSE)The map of London city
# make sure you have the same direcory stucture to get London wards shapefile
london_wards_sf <- read_sf(here::here("data/London-wards-2018/London-wards-2018_ESRI/London_Ward.shp"))
# st_geometry(london_wards_sf) # what is the geometry ?
# london_wards_sf = projected CRS: OSGB 1936 / British National Grid
# change the CRS to use WGS84 lng/lat pairs
london_wgs84 <- london_wards_sf %>%
st_transform(4326) # transform CRS to WGS84, latitude/longitude
st_geometry(london_wgs84) # what is the geometry ?## Geometry set for 657 features
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -0.51 ymin: 51.3 xmax: 0.334 ymax: 51.7
## Geodetic CRS: WGS 84
## First 5 geometries:
ggplot() +
geom_sf(data = london_wgs84, fill = "#3B454A", size = 0.125, colour = "#b2b2b277")get_sf <- function(data) {
sf_all <- st_as_sf(data,
coords=c('lng', 'lat'),
crs = 4326)
# filter points using the map
sf_within <- st_filter(sf_all, london_wgs84)
return(sf_within)
}Stop-and-search cases surged in mid 2020 over the past 36 months. Most objects of search cases are on “controlled drugs”.
alldata %>%
mutate(controlled_drugs = ifelse(object_of_search == "Controlled drugs", "Controlled drugs", "Other")) %>%
mutate(controlled_drugs = ifelse(is.na(controlled_drugs), "Other", controlled_drugs)) %>%
group_by(year, month, controlled_drugs) %>%
summarize(n_case = n()) %>%
mutate(month_year = as.Date(paste(year, "-", month, "-01", sep=""))) %>%
# label for the graph
mutate(label = if_else(month_year == as.Date("2022-09-01"), as.character(controlled_drugs), NA_character_)) %>%
filter(month_year >= "2019-10-01") %>%
ggplot(aes(x = month_year, y = n_case, group = controlled_drugs, color = controlled_drugs)) +
geom_vline(
xintercept = c(as.Date("2019-10-01"), as.Date("2020-01-01"), as.Date("2020-04-01"), as.Date("2020-07-01"),
as.Date("2020-10-01"), as.Date("2021-01-01"), as.Date("2021-04-01"), as.Date("2021-07-01"),
as.Date("2021-10-01"), as.Date("2022-01-01"), as.Date("2022-04-01"), as.Date("2022-07-01"),
as.Date("2022-09-01")), color = "grey80", size = .4) +
geom_segment(
data = tibble(y = seq(0, 45000, by = 2500), x1 = as.Date("2019-10-01"), x2 = as.Date("2022-09-01")),
aes(x = x1, xend = x2, y = y, yend = y),
inherit.aes = FALSE,
color = "grey80",
size = .4
) +
geom_segment(
data = tibble(y = 0, x1 = as.Date("2019-10-01"), x2 = as.Date("2022-09-01")),
aes(x = x1, xend = x2, y = y, yend = y),
inherit.aes = FALSE,
color = "grey40",
size = .4
) +
geom_line() +
labs(
x = NULL,
y = NULL,
title = "Stop-and-search cases surged in mid 2020 over the past 36 months",
caption = "Source: UK Metropolitan Police"
) +
scale_x_date(limits = c(as.Date("2019-10-01"), as.Date("2023-08-01")),
date_labels = "%y-%b",
breaks = c(as.Date("2019-11-01"), as.Date("2020-05-01"),
as.Date("2020-11-01"), as.Date("2021-05-01"),
as.Date("2021-11-01"), as.Date("2022-05-01")))+
geom_text_repel(
aes(color = controlled_drugs, label = label),
xlim = c(as.Date("2022-10-01"), NA),
family = "Lato",
fontface = "bold",
size = 4,
segment.size = .7,
segment.alpha = .5,
segment.linetype = "dotted"
) +
theme(legend.position = "none",
plot.title.position = "plot",
plot.caption=element_text(hjust = 0.65))
## Plot. 2
Over 80% of “Controlled drugs,”Offensive weapons”, “Stolen goods”.
Grouping the records by objects of search, we can see that each group’s points cluster at the center of the city of London.
alldata %>%
group_by(object_of_search) %>%
summarize(cnt = n()) %>%
mutate(perc = cnt / sum(cnt)) %>%
slice_max(perc, n = 5)## # A tibble: 5 × 3
## object_of_search cnt perc
## <chr> <int> <dbl>
## 1 Controlled drugs 851687 0.592
## 2 Offensive weapons 189071 0.131
## 3 Stolen goods 122620 0.0853
## 4 <NA> 110137 0.0766
## 5 Article for use in theft 60161 0.0418
ggplot() +
geom_sf(data = london_wgs84, fill = "#3B454A", size = 0.125, colour = "#b2b2b277") +
# add points from dtop and search shapefile
geom_sf(
data = get_sf(offence %>% filter(year == 2020 & month == 5)),
aes(fill = object_of_search),
color = "white", size = 1.5, alpha = 0.7, shape = 21,
show.legend = FALSE
) +
hrbrthemes::theme_ft_rc(grid = "", strip_text_face = "bold") +
theme(axis.text = element_blank()) +
theme(strip.text = element_text(color = "white")) +
theme_minimal() +
coord_sf(datum = NA) + # remove coord
facet_wrap(~object_of_search) +
labs(title = "Locations of Stop&Search in London, 2020 May") +
NULLggplot() +
geom_sf(data = london_wgs84, fill = "#3B454A", size = 0.125, colour = "#b2b2b277") +
# add points from dtop and search shapefile
geom_sf(
data = get_sf(offence %>% filter(year == 2020 & month == 6)),
aes(fill = object_of_search),
color = "white", size = 1.5, alpha = 0.7, shape = 21,
show.legend = FALSE
) +
hrbrthemes::theme_ft_rc(grid = "", strip_text_face = "bold") +
theme(axis.text = element_blank()) +
theme(strip.text = element_text(color = "white")) +
theme_minimal() +
coord_sf(datum = NA) + # remove coord
facet_wrap(~object_of_search) +
labs(title = "Locations of Stop&Search in London, 2020 June") +
NULLThe West End had most of the stop-and-search cases in Jun, 2020. St. James’s is the second most “stop-and-searched” district.
get_distri <- function(data, year, month) {
temp <- london_wgs84 %>%
mutate(count = lengths(
st_contains(london_wgs84,
get_sf(data %>% filter(year == year &
month == month)))))
return(temp)
}
tmap::tmap_mode("view") # interactive map
tmap::tm_shape(get_distri(offence, 2020, 6)) +
tm_polygons("count")